VAST Challenge 3
With reference to Challenge 3 of VAST Challenge 2022, you are required to reveal the economic of the city of Engagement, Ohio USA by using appropriate static and interactive statistical graphics methods
This exercise requires us to apply the skills you had learned in
Lesson 1 and Hands-on Exercise 1 to reveal the demographic of the city
of Engagement, Ohio USA by using appropriate static statistical graphics
methods. The data should be processed by using appropriate tidyverse
family of packages and the statistical graphics must be prepared using
ggplot2 and its extensions.
financial <- read_csv('./data/FinancialJournal.csv')
participant_data <- read_csv('./data/Participants.csv')
financial$DateTime <- as.POSIXct(financial$timestamp, format="%Y-%m-%d %H:%M:%S")
financial$year <- format(financial$DateTime, format="%Y")
financial$month <- format(financial$DateTime, format="%m")
financial$hour <- format(financial$DateTime, format="%H")
spending_ts_cat <- financial %>%
group_by(month,category) %>%
filter(!category %in% c('Wage', 'RentAdjustment')) %>%
summarise(mean_amount = - mean(amount))
spending_ts_total <- financial %>%
group_by(month) %>%
filter(!category %in% c('Wage', 'RentAdjustment')) %>%
summarise(mean_amount = - mean(amount))
glimpse(spending_ts_total)
Rows: 12
Columns: 2
$ month <chr> "01", "02", "03", "04", "05", "06", "07", "08", …
$ mean_amount <dbl> 13.22082, 13.85779, 16.21479, 13.45410, 13.99095…
p1 <- ggplot(data=spending_ts_cat, aes(x=month,
y= mean_amount,
color=category,
group=category,
text = paste('</br>Month: ', month,
'</br>Mean spending: ', round(mean_amount,2),
'</br>Category: ', category))) +
geom_line()+
#geom_bar(position="dodge2", stat = "identity") +
facet_grid(category~. , scales = "free_y",space = "free")
p2 <- ggplot(data=spending_ts_total, aes(x=month,
y= mean_amount,
group=1,
text = paste('</br>Month: ', month,
'</br>Mean spending: ', round(mean_amount,2)
)
)
)+
geom_line()
ggplotly(p1,tooltip = "text")
ggplotly(p2,tooltip = "text")
Our data includes two csv files from the VAST data source, namely FinancialJournal.csv and Participants.csv. To show the financial health of Ohio city, we derived three supporting tables from the original data. Generally, we want to ananlyze the spending habits and wage status of people with different education background, age, and household size.
monthly_income <- financial %>%
filter(category %in% c('Wage', 'RentAdjustment')) %>%
group_by(year, month) %>%
summarise(income = mean(amount))
monthly_spend <- financial %>%
filter(!category %in% c('Wage', 'RentAdjustment')) %>%
group_by(year, month) %>%
summarise(spend = mean(abs(amount)))
monthly_finance_status <- merge(monthly_income,monthly_spend,by=c("year","month"))
monthly_finance_status$date <- paste(monthly_finance_status$year, monthly_finance_status$month, sep='-')
monthly_finance_status$spendRatio <- monthly_finance_status$spend / monthly_finance_status$income
monthly_finance_status$remain <- (monthly_finance_status$income - monthly_finance_status$spend)
monthly_finance_status$remain <- round(monthly_finance_status$remain, 1)
wage <- financial %>%
filter(category == "Wage") %>%
group_by(participantId) %>%
summarise(wage = mean(amount))
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
wage <- financial %>%
filter(category == 'Wage') %>%
group_by(participantId) %>%
summarise(wage = mean(amount))
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
#glimpse(wage)
Participants’s wage and personal information.
wage <- financial %>%
filter(category == "Wage") %>%
group_by(participantId) %>%
summarise(wage = mean(amount))
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
wage <- financial %>%
filter(category == 'Wage') %>%
group_by(participantId) %>%
summarise(wage = mean(amount))
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
#glimpse(wage)
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
# unique(wage$Wage_Group)
paged_table(financial, options = list(rows.print = 15, cols.print = 5))
month_consum <- financial %>%
filter(!category %in% c('RentAdjustment', 'Wage')) %>%
group_by(year, month, category) %>%
summarise(spend = sum(-amount))
#glimpse(month_consum)
p_consum <- financial %>%
filter(!category %in% c('RentAdjustment', 'Wage')) %>%
group_by(participantId, category) %>%
summarise(spend = sum(-amount))
#glimpse(p_consum)
participant_finance <- merge(participant_data, wage,by=c("participantId"))
#glimpse(participant_finance)
d <- highlight_key(wage)
p1 <- ggplot(data=d, aes(x=wage,fill=Wage_Group)) +
geom_histogram(position="dodge", binwidth=density(wage$wage)$bw) +
labs(y= 'No. People', x= 'Wage',
title = "Fig1: Wage Distribution",
subtitle = "Most people get 58 per month")
p2 <- ggplot(data=d, aes(x=wage)) +
geom_histogram(position="dodge",aes(y = ..density..), binwidth=density(wage$wage)$bw) +
labs(y= 'Density', x= 'Wage',
title = "Fig1: Wage Distribution",
subtitle = "Most people get 50 per month")
ggplotly(p1)